Load packages

# Load packages
pacman::p_load(tidyverse, lubridate, ggthemes, ggrepel, cansim, janitor, gganimate)

Access Statistics Canada data

gdp_can_sample <- get_cansim(36100222) %>% filter(GEO == "Canada" & REF_DATE == 2019) %>%
  select(REF_DATE, VALUE, Prices, Estimates)

gdp <- get_cansim(36100222) %>% normalize_cansim_values() 

glimpse(gdp)
## Rows: 101,280
## Columns: 22
## $ REF_DATE                            <chr> "1981", "1981", "1981", "1981", "1~
## $ GEO                                 <chr> "Canada", "Canada", "Canada", "Can~
## $ DGUID                               <chr> "2016A000011124", "2016A000011124"~
## $ UOM                                 <chr> "Dollars", "Dollars", "Dollars", "~
## $ UOM_ID                              <chr> "81", "81", "81", "81", "81", "81"~
## $ VECTOR                              <chr> "v62787252", "v62787253", "v627872~
## $ COORDINATE                          <chr> "1.1.1", "1.1.2", "1.1.3", "1.1.4"~
## $ VALUE                               <dbl> 657758000000, 429530000000, 213179~
## $ STATUS                              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ SYMBOL                              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ TERMINATED                          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ DECIMALS                            <chr> "0", "0", "0", "0", "0", "0", "0",~
## $ GeoUID                              <chr> "11124", "11124", "11124", "11124"~
## $ `Hierarchy for GEO`                 <chr> "1", "1", "1", "1", "1", "1", "1",~
## $ `Classification Code for Prices`    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ `Hierarchy for Prices`              <chr> "1", "1", "1", "1", "1", "1", "1",~
## $ `Classification Code for Estimates` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ `Hierarchy for Estimates`           <chr> "1", "1.2", "1.2.3", "1.2.3.4", "1~
## $ val_norm                            <dbl> 657758000000, 429530000000, 213179~
## $ Date                                <date> 1981-01-01, 1981-01-01, 1981-01-0~
## $ Prices                              <fct> Chained (2012) dollars, Chained (2~
## $ Estimates                           <fct> "Final consumption expenditure", "~

Create function to create new variable called geo_short

provlist <- c("British Columbia","Alberta","Saskatchewan","Manitoba","Ontario",
              "Quebec", "New Brunswick","Nova Scotia","Prince Edward Island",
              "Newfoundland and Labrador")

short_provs <- function(df){
  df <- df %>%
    filter(GEO %in% provlist) %>%
    mutate(geo_short = case_when(
      GEO=="British Columbia" ~ "BC",
      GEO=="Alberta" ~ "AB",
      GEO=="Saskatchewan" ~ "SK",
      GEO=="Manitoba" ~ "MB",
      GEO=="Ontario" ~ "ON",
      GEO=="Quebec" ~ "QC",
      GEO=="New Brunswick" ~ "NB",
      GEO=="Prince Edward Island" ~ "PE",
      GEO=="Nova Scotia" ~ "NS",
      GEO=="Newfoundland and Labrador" ~ "NL"
    ))
  
  df$geo_short <- factor(df$geo_short,
                         levels=c("BC","AB","SK","MB","ON",
                                  "QC","NB","PE","NS","NL"))
  
  df
}

Do some data cleaning

df_gdp <- gdp %>%
  mutate(year=as.integer(REF_DATE)) %>% 
  filter(GEO %in% provlist,
         Prices=="Chained (2012) dollars",
         Estimates=="Gross domestic product at market prices") %>%
  select(GEO, year, gdp = VALUE)

df_gdp <- short_provs(df_gdp) %>%
  clean_names()
  
head(df_gdp)
## # A tibble: 6 x 4
##   geo                        year          gdp geo_short
##   <chr>                     <int>        <dbl> <fct>    
## 1 Newfoundland and Labrador  1981  15452000000 NL       
## 2 Prince Edward Island       1981   2639000000 PE       
## 3 Nova Scotia                1981  20616000000 NS       
## 4 New Brunswick              1981  16898000000 NB       
## 5 Quebec                     1981 196091000000 QC       
## 6 Ontario                    1981 308001000000 ON

Let’s start plotting!

plot_gdp <- ggplot(df_gdp, aes(geo_short, gdp/1e9, fill = geo_short, label = geo_short)) +
  geom_col()

plot_gdp

Let’s add some additional details

plot_gdp <- plot_gdp +
  geom_text(vjust = -0.1) +
  scale_fill_viridis_d() +
  scale_y_continuous(labels=scales::dollar) + 
  theme_tufte(base_size = 12) +
  theme(legend.position = "none",
        axis.text.x = element_blank(),
        axis.ticks = element_blank()) +
  labs(title = "Year: {frame_time}", x = "", y = "GDP (2012$ billions)")

plot_gdp

Let’s add some animation

plot_gdp + 
  transition_time(year)

If you want a static graph

df_gdp %>% #<<
  group_by(geo_short) %>%  #<<
  summarize(gdp_plot = mean(gdp/1e9)) %>% 
  ggplot(aes(geo_short, gdp_plot, fill = geo_short, label = geo_short)) +
  geom_col() +
  geom_text(vjust = -0.1, size = 3) +
  scale_fill_viridis_d() +
  scale_y_continuous(labels=scales::dollar) + 
  theme_tufte(base_size = 10) +
  theme(legend.position = "none",
        axis.text.x = element_blank(),
        axis.ticks = element_blank()) +
  labs(title = "Average GDP by province", x = "", y = "GDP (2012$ billions)")

But provinces have different sizes, so let’s do GDP / capita instead

pop <- get_cansim(1710000501) %>% normalize_cansim_values()

df_pop <- pop %>%
  mutate(year = as.integer(REF_DATE)) %>%
  filter(GEO %in% provlist,
         Sex=="Both sexes",
         `Age group`=="All ages") %>%
  dplyr::select(GEO, year, pop=VALUE) %>%
  clean_names()

df_gdpcap <-left_join(df_gdp,df_pop, by=c("year","geo")) %>%
  mutate(gdpcap=gdp/pop) 

Let’s graph GDP/capita

p_gdpcap <- ggplot(df_gdpcap, aes(geo_short, gdpcap, 
                                  fill = geo_short, label = geo_short)) +
  geom_col() + 
  geom_text(vjust = -0.1, check_overlap = T) +
  scale_fill_viridis_d(name="") +
  scale_y_continuous(labels=scales::dollar) + 
  theme_tufte(base_size = 12) +
  theme(legend.position = "none",
        axis.text.x = element_blank(),
        axis.ticks = element_blank()) +
  labs(title = 'Year: {frame_time}', x = '', y = 'GDP per capita ($2012)') +
  transition_time(year) +
  ease_aes('linear')  

animate(p_gdpcap, fps=5, end_pause = 30)

Let’s make it extra cool. What if we get the provinces to rank themselves every single frame? Let’s first create a ranking variable.

df_ordered <- df_gdpcap %>%
  group_by(year) %>%
  mutate(prov_order = rank(gdpcap))

Let’s plot it now. Use geom_tile instead of geom_col because it works better with the frame transitions

p_rank <- ggplot(df_ordered,
                        aes(prov_order, group = geo_short, color= geo_short, fill=geo_short)) +
  geom_tile(aes(y = gdpcap/2, 
                height = gdpcap,
                width = 0.9), alpha = 0.9) +
  # text on top of bars
  geom_text(aes(y = gdpcap, label = geo_short), hjust = -0.4) +
  # text in x-axis (requires clip = "off" in coord_cartesian)
  geom_text(aes(y = 0, label = geo_short), hjust = 1.4) +
  coord_flip(clip = "off", expand = FALSE) +
  scale_color_viridis_d(name="")+
  scale_fill_viridis_d(name="")+
  scale_y_continuous(labels=scales::dollar)+
  theme_tufte(14)+
  guides(color=F,fill=F)+
  labs(title='{frame_time}', x = "",y="GDP per capita ($2012)") +
  theme(plot.title = element_text(hjust = 1, size = 22),
        axis.ticks.y = element_blank(),
        axis.text.y  = element_blank()) + 
  transition_time(year)+
  ease_aes('cubic-in-out')

animate(p_rank, nframes = 100, fps = 5, end_pause = 20)